# We will use this to explore the distribution # of sample means. # First, let us create a large population source( "../gnrnd5.R") gnrnd5(156437499904, 413003074) # # just for our reference let us look at the # mean and standard deviation of this population pop_mean <- mean( L1 ) pop_mean source( "../pop_sd.R") pop_sigma <- pop_sd( L1 ) pop_sigma # now, get 10000 samples of that population # and, for each sample, save the sample mean # We will get samples of size 36 samp_size <- 36 L2<-1:10000 for( i in 1:10000) { L3 <- sample( L1, samp_size ) samp_mean <- mean( L3) L2[i] <- samp_mean } # we maintain that the distribution of sample # means for samples of this size will be # normal, with the same mean as the population # and the distribution of the sample means # will have a standard deviation equal to # the population standard deviation divided by # the square root of the sample size, i.e., # N( pop_mean, pop_sigma/sqrt(samp_size) ) # let us see what we have # The mean of the means is mean( L2 ) # which should be remarkably close to pop_mean # the population mean # The standard deviation of the sample means is pop_sd( L2 ) # which should be remarkably close to pop_sigma/sqrt(samp_size) # so the mean and standard deviation of the # sample means are right on target. But is # the distribution normal? hist( L2 ) hist( L2, main="Distribution of Sample Means", breaks=30) boxplot( L2, horizontal=TRUE) source( "../assess_normality.R") assess_normality( L2 ) ############################### # That was great, but now go back and # try the same thing for samples of # size 29 (just change line 20 and then # run the subsequent lines through line 57 ) #################################### ############################################################ # I wonder if this had anything to do with # the distribution of the original population hist( L1 ) boxplot( L1, horizontal = TRUE) assess_normality( L1 ) # Clearly the original population was # normal. Let us try all of the same work # but this time for a population that is # definitely not normal gnrnd5(156437499901, 1418002352) # # First look at the population to be sure # is not normal hist( L1 ) boxplot( L1, horizontal = TRUE) assess_normality( L1 ) # Clearly not normal # Then get our new population values pop_mean <- mean( L1 ) pop_mean pop_sigma <- pop_sd( L1 ) pop_sigma # now, get 10000 samples of that population # and, for each sample, save the sample mean # We will get samples of size 36 samp_size <- 36 L2<-1:10000 for( i in 1:10000) { L3 <- sample( L1, samp_size ) samp_mean <- mean( L3) L2[i] <- samp_mean } # we maintain that the distribution of sample # means for samples of this size will be # normal, with the same mean as the population # and the distribution of the sample means # will have a standard deviation equal to # the population standard deviation divided by # the sample size, i.e., # N( pop_mean, pop_sigma/sqrt(samp_size) ) # let us see what we have # The mean of the means is mean( L2 ) # which should be remarkably close to pop_mean # the population mean # The standard deviation of the sample means is pop_sd( L2 ) # which should be remarkably close to pop_sigma/sqrt(samp_size) # so the mean and standard deviation of the # sample means are right on target. But is # the distribution normal? hist( L2 ) hist( L2, main="Distribution of Sample Means", breaks=30) boxplot( L2, horizontal=TRUE) assess_normality( L2 ) # so the distribution of the sample means # is normal with the same expected values for # the mean of the mean values and the # standard deviation of the mean values. ################################### # You could go back to line 97 and change # the size of the sample to see that this # does not depend on the sample size (within # reason...you want to stay away from really # small samples...if the population is not # approximately normal, as is this case, then # you want to have samples of 30 or more as a # general rule). ##################################### #### Let us try a different population #### Again, choose one that is not normal gnrnd5(156437499902, 1418002352) # # First look at the population to be sure # is not normal hist( L1 ) boxplot( L1, horizontal = TRUE) assess_normality( L1 ) # Clearly not normal # Then get our new population values pop_mean <- mean( L1 ) pop_mean pop_sigma <- pop_sd( L1 ) pop_sigma # now, get 10000 samples of that population # and, for each sample, save the sample mean # We will get samples of size 36 samp_size <- 36 L2<-1:10000 for( i in 1:10000) { L3 <- sample( L1, samp_size ) samp_mean <- mean( L3) L2[i] <- samp_mean } # we maintain that the distribution of sample # means for samples of this size will be # normal, with the same mean as the population # and the distribution of the sample means # will have a standard deviation equal to # the population standard deviation divided by # the sample size, i.e., # N( pop_mean, pop_sigma/sqrt(samp_size) ) # let us see what we have # The mean of the means is mean( L2 ) # which should be remarkably close to pop_mean # the population mean # The standard deviation of the sample means is pop_sd( L2 ) # which should be remarkably close to pop_sigma/sqrt(samp_size) # so the mean and standard deviation of the # sample means are right on target. But is # the distribution normal? hist( L2 ) hist( L2, main="Distribution of Sample Means", breaks=30) boxplot( L2, horizontal=TRUE) assess_normality( L2 ) # so the distribution of the sample means # is normal with the same expected values for # the mean of the mean values and the # standard deviation of the mean values. ################################### # You could go back to line 176 and change # the size of the sample to see that this # does not depend on the sample size (again, # within reason).